EIA API - Data Backfill (R Version)

The goal of this doc is to execute an initial data pull of the hourly demand for California balancing authority subregion (CISO). This includes the following four independent system operators:

The data backfill process includes the following steps:

Load Libraries and Functions

library(dplyr)
library(EIAapi)
library(jsonlite)
library(gt)
library(plotly)
source("../R/eia_data.R")
meta_json <- read_json(path = "../metadata/series.json")
s <- meta_json[[1]]
series <- lapply(1:length(s), function(i) {
    return(data.frame(
        parent_id = s[[i]]$parent_id,
        parent_name = s[[i]]$parent_name,
        subba_id = s[[i]]$subba_id,
        subba_name = s[[i]]$subba_name
    ))
}) |>
    bind_rows()
api_path <- meta_json[[2]]
facets_template <- list(
    parent = NULL,
    subba = NULL
)

start <- as.POSIXct("2018-7-01 8:00:00")
end <- as.POSIXct("2024-2-18 0:00:00")
attr(start, "tzone") <- "UTC"
attr(end, "tzone") <- "UTC"

offset <- 2250

eia_api_key <- Sys.getenv("EIA_API_KEY")

meta_path <- "../metadata/ciso_log_R.csv"
data_path <- "../csv/ciso_grid_R.csv"
metadata <- eia_metadata(api_key = eia_api_key, api_path = api_path)
Warning: input string 'The api_key argument is missing... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_key argument is not valid... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_path argument is missing... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_path argument is not valid, must be a character object ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'Could not pull the metadata... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'Could not parse the metadata JSON... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_key argument is missing... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_key argument is not valid... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_path argument is missing... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_path argument is not valid, must be a character object ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'Could not pull the metadata... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'Could not parse the metadata JSON... ❌
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
print(names(metadata))
 [1] "id"                "name"              "description"      
 [4] "frequency"         "facets"            "data"             
 [7] "startPeriod"       "endPeriod"         "defaultDateFormat"
[10] "defaultFrequency"  "command"          
print(metadata$startPeriod)
[1] "2018-06-19T05"
print(metadata$endPeriod)
[1] "2024-02-24T08"
meta <- NULL
data <- NULL
for (i in 1:nrow(series)) {
    facets <- facets_template
    facets["parent"] <- series[i, "parent_id"]
    facets["subba"] <- series[i, "subba_id"]
    print(facets)

    temp <- eia_backfill(
        start = start,
        end = end,
        offset = offset,
        api_key = eia_api_key,
        api_path = paste(api_path, "data", sep = ""),
        facets = facets
    )
    index <- seq.POSIXt(from = start, to = end, by = "hour")
    ts_obj <- data.frame(period = index) |>
        left_join(temp, by = c("period" = "time"))

    meta_temp <- create_metadata(data = ts_obj, start = start, end = end, type = "backfill")
    meta_temp$index <- 1
    meta_df <- as.data.frame(meta_temp)

    meta <- rbind(meta, meta_df)
    data <- rbind(data, ts_obj)
}
$parent
[1] "CISO"

$subba
[1] "PGAE"

$parent
[1] "CISO"

$subba
[1] "SCE"

$parent
[1] "CISO"

$subba
[1] "SDGE"

$parent
[1] "CISO"

$subba
[1] "VEA"
print(meta)
  index parent subba                time               start        end
1     1   CISO  PGAE 2024-02-25 05:06:56 2018-07-01 08:00:00 2024-02-18
2     1   CISO   SCE 2024-02-25 05:07:20 2018-07-01 08:00:00 2024-02-18
3     1   CISO  SDGE 2024-02-25 05:07:43 2018-07-01 08:00:00 2024-02-18
4     1   CISO   VEA 2024-02-25 05:08:06 2018-07-01 08:00:00 2024-02-18
            start_act    end_act start_match end_match n_obs na     type update
1 2018-07-01 08:00:00 2024-02-18        TRUE      TRUE 49385 98 backfill  FALSE
2 2018-07-01 08:00:00 2024-02-18        TRUE      TRUE 49385 98 backfill  FALSE
3 2018-07-01 08:00:00 2024-02-18        TRUE      TRUE 49385 98 backfill  FALSE
4 2018-07-01 08:00:00 2024-02-18        TRUE      TRUE 49385 98 backfill  FALSE
  success                    comments
1   FALSE Missing values were found; 
2   FALSE Missing values were found; 
3   FALSE Missing values were found; 
4   FALSE Missing values were found; 
# The initial pull has some missing values
head(data)
               period subba               subba_name parent
1 2018-07-01 08:00:00  PGAE Pacific Gas and Electric   CISO
2 2018-07-01 09:00:00  PGAE Pacific Gas and Electric   CISO
3 2018-07-01 10:00:00  PGAE Pacific Gas and Electric   CISO
4 2018-07-01 11:00:00  PGAE Pacific Gas and Electric   CISO
5 2018-07-01 12:00:00  PGAE Pacific Gas and Electric   CISO
6 2018-07-01 13:00:00  PGAE Pacific Gas and Electric   CISO
                             parent_name value   value_units
1 California Independent System Operator 12522 megawatthours
2 California Independent System Operator 11745 megawatthours
3 California Independent System Operator 11200 megawatthours
4 California Independent System Operator 10822 megawatthours
5 California Independent System Operator 10644 megawatthours
6 California Independent System Operator 10559 megawatthours
# Save the data
d <- append_data(data_path = data_path, new_data = data, init = TRUE, save = TRUE)
[1] "Initial data pull"
[1] "Save the data to CSV file"
# Save the metadata
meta["success"] <- TRUE
meta["update"] <- TRUE
m <- append_metadata(meta_path = meta_path, new_meta = meta, init = TRUE, save = TRUE)
[1] "Saving the metadata file"

Plot the Series

We will use Plotly to visualize the series:

d <- data |> arrange(subba, period)

p <- plot_ly(d, x = ~period, y = ~value, color = ~subba, type = "scatter", mode = "lines")

p